################################################################################################
#######  Volha Charnysh. 2024. Uprooted: How post-WWII Population Transfers Remade Europe. CUP
#######  Replication for analysis in Chapter 6. Municipality analysis for Schleswig-Holstein
######## R version 3.6.3 (2020-02-29) -- "Holding the Windsock"
######## Platform: x86_64-apple-darwin15.6.0 (64-bit)
########################################################

rm(list = ls())
library(knitr) #for knitting
library(ggplot2) #for plotting
library(plyr)
library(openxlsx)
library(stargazer)
library(sandwich)
library(lmtest)
library(AER)
library(ei) #for ecological inference
library(MCMCvis)


#Read the data
dat4<-read.xlsx("chapter6/Schleswig-municipal-data.xlsx")



##OUTCOME VARIABLES
dat4$Turnout1949<-dat4$Abgegebene_Zettel_1949/dat4$Wahlberechtigte_1949
dat4$PtSPD1949<-dat4$SPD_1949/dat4$Gültig_1949
dat4$PtCDU1949<-dat4$CDU_1949/dat4$Gültig_1949

#####################################################################################
############## Table A.11 VOTE IN THE 1949 election at the municipal level ##########
#####################################################################################

dat4$LnPop1939<-log(dat4$Pop1939)
dat5<-subset(dat4, !is.na(Turnout1949))

lm1 <-lm(Turnout1949 ~ PctExpellees46 +ShareDestr46+ShareCath50+ShareErwAgr50+LnPop1939+distGDRkm+Kreis1950, data = dat5)
summary(lm1)

lm2 <-lm(PtSPD1949 ~ PctExpellees46 +ShareDestr46+ShareCath50+ShareErwAgr50+LnPop1939+distGDRkm+Kreis1950, data = dat5)
summary(lm2)

lm3 <-lm(PtCDU1949 ~ PctExpellees46 +ShareDestr46+ShareCath50+ShareErwAgr50+LnPop1939+distGDRkm+Kreis1950, data = dat5)
summary(lm3)


ses1 <- list(coeftest(lm1, vcov = vcovHC(lm1, type="HC1"))[,2], coeftest(lm2, vcov = vcovHC(lm2, type="HC1"))[,2], coeftest(lm3, vcov = vcovHC(lm3, type="HC1"))[,2])

pvals1 <- list(coeftest(lm1, vcov = vcovHC(lm1, type="HC1"))[,4], coeftest(lm2, vcov = vcovHC(lm2, type="HC1"))[,4], coeftest(lm3, vcov = vcovHC(lm3, type="HC1"))[,4])


stargazer(lm1, lm2, lm3, se = ses1, p=pvals1, digits=2, 
          covariate.labels=c("Share expellees (1946)", "Share destroyed","Share Catholic (1950)", "Share in agriculture (1950)",   "Ln population (1939)", "Dist to the Eastern border"), 
          omit=c("Kreis1950", "Constant"), omit.stat=c("ser","f", "rsq"), no.space=TRUE, column.labels = c("Turnout 1949", "SPD 1949", "CDU 1949"), 
          title="Results in the 1949 election at the municipal level in Schleswig-Holstein. Robust SEs in parentheses.") #

##################################################
############## ECOLOGICAL INFERENCE ##############
##################################################

#Note: results vary slightly with each run of the code, so they will not perfectly match numbers reported on p. 155.

############ 2x2 EI without Covariatess ###########
##### SPD #####
## create data tables to feed to ei() function
data.1949.SPD <- matrix(NA, ncol = 3, nrow = nrow(dat4))
data.1949.SPD <- as.data.frame(data.1949.SPD)
colnames(data.1949.SPD) <- c( "n", "imm", "SPD")
data.1949.SPD[,1] <- dat4$Gültig_1949 #Total valid votes
## percent immigrants 1949
data.1949.SPD[,2] <- dat4$PctExpellees46
## percentage of votes for SPD in 1949
data.1949.SPD[, 3] <- dat4$SPD_1949/data.1949.SPD[,1]
## keep only non-na obs
data.1949.SPD <- na.omit(data.1949.SPD) 

## run ei
formula_imm_parties.SPD <- SPD ~ imm
set.seed(02139)
result_imm_parties.SPD <- ei(formula = formula_imm_parties.SPD, total = "n", data = data.1949.SPD)

SPD.result <- summary(result_imm_parties.SPD)$`Estimates of Aggregate Quantities of Interest`



###### CDU ######
## create data tables to feed to ei() function
data.1949.CDU <- matrix(NA, ncol = 3, nrow = nrow(dat4))
data.1949.CDU <- as.data.frame(data.1949.CDU)
colnames(data.1949.CDU) <- c( "n", "imm", "CDU")
data.1949.CDU[,1] <- dat4$Gültig_1949 #Total valid votes
## percent immigrants 1949
data.1949.CDU[,2] <- dat4$PctExpellees46
## percentage of votes for CDU in 1949
data.1949.CDU[, 3] <- dat4$CDU_1949/data.1949.CDU[,1]
## keep only non-na obs
data.1949.CDU <- na.omit(data.1949.CDU) 

## run ei
formula_imm_parties.CDU <- CDU ~ imm
set.seed(02139)
result_imm_parties.CDU <- ei(formula = formula_imm_parties.CDU, data = data.1949.CDU, total = "n", sample = 100)

cdu.result <- summary(result_imm_parties.CDU)$`Estimates of Aggregate Quantities of Interest`

######## Turnout ########
## create data tables to feed to ei() function
data.1949.turnout <- matrix(NA, ncol = 3, nrow = nrow(dat4))
data.1949.turnout <- as.data.frame(data.1949.turnout)
colnames(data.1949.turnout) <- c( "n", "imm", "turnout")
data.1949.turnout[,1] <- dat4$Wahlberechtigte_1949 #Total eligible voters
## percent immigrants 1949
data.1949.turnout[,2] <- dat4$PctExpellees46
## turnout percentage
data.1949.turnout[, 3] <- dat4$Turnout1949
## keep only non-na obs
data.1949.turnout <- na.omit(data.1949.turnout) 

## run ei
formula_imm_parties.turnout <- turnout ~ imm
set.seed(02139)
result_imm_parties.turnout <- ei(formula = formula_imm_parties.turnout, total = "n", data = data.1949.turnout)

turnout.result <- summary(result_imm_parties.turnout)$`Estimates of Aggregate Quantities of Interest`


